knitr::opts_chunk$set(echo = TRUE, message = FALSE, warning = FALSE)

0.1 Introduction

Mental health challenges among university students are a growing concern, with academic stress often contributing to anxiety and depression. This analysis examines the impact of exam and non-exam periods on SSRI and SNRI prescription rates across Scottish health boards from 2018 to 2023.

Key questions include: - How do prescription rates differ between exam and non-exam periods? - How do demographic factors, such as the percentage of young adults (17–25), influence trends? - Which health boards show the highest changes in prescription rates?

By integrating prescription, demographic, and geographic data, this study highlights seasonal and regional variations in mental health support needs, offering insights for targeted interventions and resource allocation.

library(janitor) 
library(here) 
library(dbplyr)
library(tidyverse)

1 Loading and Cleaning Datasets

1.1 Load and clean demographic datasets - “HB_names”, “population_data” and “age_data”

  • HB_names dataset is processed to select only the health board code and names.
  • population_data is processed to extract the total population of each health board.
  • age_data is processed to extract the total number of young adults aged 17 to 25, the typical age range of university students. The dataset is then joined with the population_data to create a column of percentage. This would give a clear view of which health board regions to consider in the analysis.
library(stringr)

# Load and clean Health board names data
HB_names <- read_csv("C:/Data_Science/B223640/data/HB_names.csv") %>%
  clean_names() %>% 
  select(hb, hb_name) %>% 
  rename(hbt = hb) %>%
  mutate(hb_name = str_remove(hb_name, "^NHS\\s"))

# Load and clean population data
population_data <- read_csv("C:/Data_Science/B223640/data/UV103_age_health_board_census.csv", skip = 10) %>%
  rename(hb_name = "Health Board Area 2019",
         hb_population = Count) %>% 
  filter(Age == "All people" & Sex == "All people") %>% 
  select(hb_name, hb_population) %>% 
  mutate(hb_name = str_remove(hb_name, "^NHS\\s")) %>% 
  arrange(desc(hb_population))

age_data <- read.csv("C:/Data_Science/B223640/data/UV103_age_health_board_census.csv", skip = 10) %>%
  filter(str_detect(Age, "17|18|19|20|21|22|23|24|25")) %>%
  rename(hb_name = "Health.Board.Area.2019") %>% 
  group_by(hb_name) %>% 
  summarise(totalpeople = sum(Count)) %>% 
  left_join(population_data, by = "hb_name") %>% 
  mutate(percentage = totalpeople * 100 / hb_population) %>% 
  arrange(desc(percentage))

1.2 Loading the Exam Season and Non-Exam Season Datasets from 2018 to 2023

The exam season includes: - Semester 1 finals (November and December). - Semester 2 finals (April and May), as SSRI and SNRI treatments typically require at least four weeks to take effect (Ankrom, 2024).

The non-exam season is defined as: - The first two months of the university semester: - Semester 1 start: September and October. - Semester 2 start: January and February. - During these periods, students typically experience lighter workloads and less stress compared to exam periods.

library(tidyverse)

# Define column_types first to ensure consistent data parsing across all files by explicitly defining how each column should be interpreted
column_types <- cols(
  HBT = col_character(),
  DMDCode = col_character(),
  BNFItemCode = col_character(),
  BNFItemDescription = col_character(),
  PrescribedType = col_character(),
  GPPractice = col_double(),
  NumberOfPaidItems = col_double(),
  PaidQuantity = col_double(),
  GrossIngredientCost = col_double(),
  PaidDateMonth = col_double()
)

# List all CSV files in the "examszn_18-23" folder
examfiles <- list.files("C:/Data_Science/B223640/data/examszn_18-23", pattern = "csv", full.names = TRUE)
examszndata <- examfiles %>% 
  map_dfr(~read_csv(., col_types = column_types))

# List all CSV files in the "nonexamszn_18-23" folder
nonexamfiles <- list.files("C:/Data_Science/B223640/data/nonexamszn_18-23", pattern = "csv", full.names = TRUE)
nonexamszndata <- nonexamfiles %>% 
  map_dfr(~read_csv(., col_types = column_types))

2 Processing Datasets into Wide Format

This code generates a table that compares prescription per capita for SSRIs and SNRIs during exam and non-exam periods across Scottish health boards.

2.0.1 Steps:

1.- Calculate the prescription rates for SSRIs and SNRIs per capita for both exam and non-exam periods. - This accounts for the total population in each health board.

    • Incorporate the percentage of individuals aged 18–25 from the demographic data for each health board.
    • Compute the percentage change in prescription rates between exam and non-exam periods to highlight differences.
    • Use the gt package to create a clean and visually appealing table for presentation.
# Load required libraries
library(tidyverse)
library(gt)
library(dplyr)

# Define a reusable function to process wide-format data
process_wide_data <- function(data) {
  data %>%
    clean_names() %>%
    mutate(bnf_item_description = str_remove(bnf_item_description, "[_\\s].*")) %>%
    full_join(HB_names) %>%
    full_join(age_data) %>%
    filter(str_detect(bnf_item_description, "ESCITALOPRAM|SERTRALINE|FLUOXETINE|VENLAFAXINE|PAROXETINE|CITALOPRAM")) %>%
    filter(str_detect(hb_name, "Lothian|Grampian|Greater Glasgow and Clyde|Tayside|Fife|Forth Valley|Lanarkshire")) %>%
    mutate(hb_name = str_remove(hb_name, "^NHS\\s")) %>%
    select(hb_name, bnf_item_description, paid_date_month, hb_population, paid_quantity) %>%
    group_by(hb_name, bnf_item_description) %>%
    summarise(total_prescriptions = sum(paid_quantity), .groups = "drop") %>%
    pivot_wider(
      names_from = bnf_item_description,
      values_from = total_prescriptions,
      values_fill = list(total_prescriptions = 0)
    ) %>%
    rowwise() %>%  # Ensure row-wise summation
    mutate(total_prescriptions = sum(c_across(CITALOPRAM:VENLAFAXINE), na.rm = TRUE)) %>%
    ungroup() %>%
    full_join(age_data) %>%
    drop_na()  # Remove rows with NA values
}

# Process exam and non-exam data using the function
examszndatawide <- process_wide_data(examszndata)
nonexamszndatawide <- process_wide_data(nonexamszndata)

# Add prescription per capita to examszndatawide and nonexamszndatawide
examszndatawide <- examszndatawide %>%
  mutate(prescription_per_capita = total_prescriptions / hb_population)

nonexamszndatawide <- nonexamszndatawide %>%
  mutate(prescription_per_capita = total_prescriptions / hb_population)
# Combine data for comparison, including the percentage column
combined_prescriptions <- examszndatawide %>%
  select(hb_name, prescription_per_capita_exam = prescription_per_capita, percentage) %>%
  left_join(
    nonexamszndatawide %>%
      select(hb_name, prescription_per_capita_non_exam = prescription_per_capita),
    by = "hb_name"
  )
# Add a change column to show the percentage difference between exam and non-exam seasons
combined_prescriptions <- combined_prescriptions %>%
  mutate(
    change_percent = ((prescription_per_capita_exam - prescription_per_capita_non_exam) / prescription_per_capita_non_exam) * 100
  )

# Create a prettier table with grand_summary_rows
pretty_table <- combined_prescriptions %>%
  arrange(desc(prescription_per_capita_exam)) %>%
  gt() %>%
  tab_header(
    title = "Prescription Per Capita and Age Group Percentage",
    subtitle = "Exam Season vs Non-Exam Season (2018-2023)"
  ) %>%
  fmt_number(
    columns = vars(prescription_per_capita_exam, prescription_per_capita_non_exam, percentage, change_percent),
    decimals = 2
  ) %>%
  cols_label(
    hb_name = "Health Board",
    prescription_per_capita_exam = "Exam Season (Per Capita)",
    prescription_per_capita_non_exam = "Non-Exam Season (Per Capita)",
    percentage = "17–25 Age Group (%)",
    change_percent = "Change (%)"
  ) %>%
  tab_spanner(
    label = "Prescription Per Capita",
    columns = vars(prescription_per_capita_exam, prescription_per_capita_non_exam, change_percent)
  ) %>%
  tab_style(
    style = cell_text(weight = "bold"),
    locations = cells_column_labels(everything())
  ) %>%
  tab_style(
    style = cell_fill(color = "lightblue"),
    locations = cells_body(
      columns = vars(change_percent),
      rows = change_percent > 0
    )
  ) %>%
  tab_style(
    style = cell_fill(color = "lightpink"),
    locations = cells_body(
      columns = vars(change_percent),
      rows = change_percent < 0
    )
  ) %>%
  grand_summary_rows(
    columns = vars(prescription_per_capita_exam, prescription_per_capita_non_exam, percentage),
    fns = list(
      Avg = ~mean(., na.rm = TRUE)
    ),
    formatter = fmt_number,
    decimals = 2
  ) %>%
  tab_source_note(
    source_note = "Data sourced from 2018-2023 Prescription Records"
  )

# Print the improved table
pretty_table
Prescription Per Capita and Age Group Percentage
Exam Season vs Non-Exam Season (2018-2023)
Health Board
Prescription Per Capita
17–25 Age Group (%)
Exam Season (Per Capita) Non-Exam Season (Per Capita) Change (%)
Greater Glasgow and Clyde 83.41 80.25 3.93 25.06
Forth Valley 82.67 78.52 5.28 21.22
Fife 80.26 74.47 7.77 21.54
Lanarkshire 72.08 72.50 −0.59 19.75
Grampian 68.26 64.58 5.70 20.83
Lothian 64.53 61.34 5.20 25.61
Tayside 61.97 56.09 10.48 22.20
Avg 73.31 69.68 22.32
Data sourced from 2018-2023 Prescription Records

2.0.2 Key Findings

  • Prescription Per Capita (Exam vs. Non-Exam):
    • Most health boards show higher per capita prescription rates during exam seasons (e.g., Tayside: +10.48%, Fife: +7.77%).
    • Lanarkshire shows a slight decrease (-0.59%), indicating other influencing factors beyond academic stress.
  • Percentage Change:
    • Regions like Tayside and Fife exhibit the most significant increases in prescription rates during exam seasons, suggesting exam stress plays a critical role.
    • Lothian, despite having the highest percentage of young adults (25.61%), does not exhibit high prescription rates, highlighting potential differences in prescribing practices or other regional factors.
  • Demographic Correlation:
    • While regions with a high percentage of young adults (e.g., Greater Glasgow and Clyde: 25.06%) often show elevated prescription rates, Lothian is an exception, emphasizing the need to explore other contributing factors.

3 Processing Data for Exam and Non-Exam Seasons

3.1 Define a Function to Process the Data

The following function processes the exam and non-exam datasets: - Cleans column names. - Joins with demographic data (HB_names, population_data, and age_data). - Filters for SSRIs and SNRIs commonly prescribed for exam-related stress. - Focuses on specific health boards with high percentages of young adults. - Calculates total prescriptions and per-person prescription rates.

# Define a function to process the data
process_data <- function(data, season) {
  data %>%
    clean_names() %>%
    mutate(bnf_item_description = str_remove(bnf_item_description, "[_\\s].*")) %>%
    full_join(HB_names) %>%
    full_join(population_data) %>%
    full_join(age_data) %>%
    filter(str_detect(bnf_item_description, "ESCITALOPRAM|SERTRALINE|FLUOXETINE|VENLAFAXINE|PAROXETINE|CITALOPRAM")) %>%
    filter(str_detect(hb_name, "Lothian|Grampian|Greater Glasgow and Clyde|Tayside|Fife|Forth Valley|Lanarkshire")) %>%
    group_by(hb_name, bnf_item_description, totalpeople, percentage, hb_population) %>%
    summarise(total_prescription = sum(paid_quantity, na.rm = TRUE)) %>%
    filter(!is.na(bnf_item_description)) %>%
    mutate(
      Season = season,
      per_person = total_prescription / hb_population
    )
}

# Process exam and non-exam data
examszndata1 <- process_data(examszndata, "examseason")
nonexamszndata1 <- process_data(nonexamszndata, "nonexamseason")

# Combine the datasets
combined_data <- bind_rows(examszndata1, nonexamszndata1) %>%
  filter(!is.na(bnf_item_description))
library(forcats)
library(ggplot2)
library(plotly)

# Create the ggplot without text labels
p <- ggplot(combined_data, aes(
  x = total_prescription, 
  y = fct_reorder(bnf_item_description, total_prescription, .fun = sum, .desc = TRUE),  # Reorder drugs in descending order
  fill = Season
)) +
  geom_bar(stat = "identity", position = position_dodge(width = 0.8)) +
  labs(
    title = "Comparison of Total Prescriptions by Drug",
    x = "Total Prescriptions",
    y = "Drug",
    fill = "Season"
  ) +
  theme_minimal() +
  facet_wrap(~fct_reorder(hb_name, total_prescription, .fun = sum, .desc = TRUE), ncol = 2)  # Reorder health boards in descending order

# Make the plot interactive with only `total_prescription` and `Season` in the tooltip
interactive_plot <- ggplotly(p, tooltip = c("x", "fill"))

# Display the interactive plot
interactive_plot

3.2 Results: Seasonal Variations in SSRI and SNRI Prescriptions Across Scottish Health Boards

3.2.2 Health Board Variability

  • Greater Glasgow and Clyde and Lothian have the highest total prescriptions, correlating with larger student populations.
  • Fife, Forth Valley, and Tayside show lower totals but follow similar seasonal patterns.

3.2.3 Drug-Specific Observations

  • Sertraline, Fluoxetine, and Citalopram are the most commonly prescribed drugs, while Escitalopram and Paroxetine have lower rates.

3.2.4 Seasonal Prescription Differences

  • Exam seasons show a slight increase in prescriptions, highlighting academic stress as a driver for mental health support.

3.2.5 Potential Correlations

  • Health boards with higher proportions of young adults (e.g., Greater Glasgow and Clyde) tend to have higher prescription totals, linking demographics to prescription trends.

3.3 Combining and Analyzing Prescription Data Across Exam and Non-Exam Periods

3.3.1 Merging and Cleaning the Data

The following code combines the exam and non-exam datasets (examszndata and nonexamszndata) into a single dataset, joinedfulldata. Key steps include: - Cleaning column names for consistency. - Joining with demographic datasets (HB_names and population_data) to enrich the data with health board names and population details. - Filtering the data to focus on: - Six health boards with a high prevalence of young adults: Lothian, Grampian, Greater Glasgow and Clyde, Tayside, Fife, Forth Valley, and Lanarkshire. - Six commonly prescribed SSRIs and SNRIs: Escitalopram, Sertraline, Fluoxetine, Venlafaxine, Paroxetine, and Citalopram. - Extracting year and categorizing data into four academic periods: Semester 1 Finals, Semester 2 Finals, Semester 1 Start, and Semester 2 Start. - Grouping by year and period for analysis.

joinedfulldata <- examszndata %>% 
  full_join(nonexamszndata)

joinedfulldata <- joinedfulldata %>% 
  clean_names() %>% 
  full_join(HB_names) %>% 
  full_join(population_data) %>% 
  select(hb_name, bnf_item_description, paid_date_month, paid_quantity) %>% 
  filter(str_detect(bnf_item_description, "ESCITALOPRAM|SERTRALINE|FLUOXETINE|VENLAFAXINE|PAROXETINE|CITALOPRAM"))%>%
  filter(str_detect(hb_name, "Lothian|Grampian|Greater Glasgow and Clyde|Tayside|Fife|Forth Valley|Lanarkshire")) %>%
   mutate(bnf_item_description = str_remove(bnf_item_description, "[_\\s].*")) %>% 
  mutate(
     year = substr(paid_date_month, 1, 4),
  period = case_when(
    substr(paid_date_month, 5, 6) %in% c("04", "05") ~ "Semester 1 Finals",
    substr(paid_date_month, 5, 6) %in% c("11", "12") ~ "Semester 2 Finals",
    substr(paid_date_month, 5, 6) %in% c("01", "02") ~ "Semester 2 Start",
    substr(paid_date_month, 5, 6) %in% c("09", "10") ~ "Semester 1 Start",
    TRUE ~ NA_character_
  )) %>% 
  group_by(year, period)